/*
A Pandemic Crossing the Border: The Impact of Covid-19 in the US on the Mexican Labor Market
Rojas and Yu, 2023

Replication file for Figure 2
*/

clear all
set more off

global data "COMPLETE YOUR DIRECTORY HERE \data"
global output "COMPLETE YOUR DIRECTORY HERE \output"

**List of years to be used
set seed 422
set obs 10
egen year=seq()
replace year=year+2010
drop if year==2018 | year==2019

*Groups of 3
forval i=1/20{
gen r`i'=runiform()
sort r`i'
replace r`i'=. if _n>3
sort year
}

*Groups of 4
forval i=21/40{
gen r`i'=runiform()
sort r`i'
replace r`i'=. if _n>4
sort year
}

*Groups of 5
forval i=41/60{
gen r`i'=runiform()
sort r`i'
replace r`i'=. if _n>5
sort year
}

forval j=1/60{
	replace r`j'=year if r`j'!=.
	gen select`j'=1 if r`j'!=.
}

gen r61=year
gen select61=1

tempfile yearsrandom
save `yearsrandom', replace


mat results=J(16, 61, .)

forval k=1/61{
	
	/*
	With the given set of years, build the analysis dataset
	*/
	
	* Collapse EMIF data
	import delimited "$data/emif_south_to_north_mun.csv", encoding(ISO-8859-2) clear 

	* Total travelers per trip
	gen total_travelers=members+1

	* drop missing us states
	drop if us_state_name=="Not Specific State"
	
	* Merge random years
	rename year r`k'
	merge n:n r`k' using `yearsrandom', keepusing(select`k') nogen
	drop if select`k'==.

	* Collapse
	collapse (sum) total_travelers, by(us_state_name mx_mun)
	gen state=lower(us_state_name)

	* Expand
	expand 224
	bys state mx_mun: gen time=_n

	* Merge with COVID data
	merge m:1 state time using "$data/daily_covid_pop.dta"
	keep if _merge==3

	* Trip shares
	bys mx_mun time: egen outflow=sum(total_travelers)
	gen t_share=total_travelers/outflow
	label var t_share "Number of travelers/total outflow"

	* Covid exposure (per 10,000)
	gen covid_exp=t_share*(confirmed/popestimate2019)*10000

	* Collapse at mun level
	keep mx_mun total_travelers covid_exp date month year time
	collapse (sum) total_travelers covid_exp, by(mx_mun date month year time)

	tempfile temp
	save `temp', replace

	* Municipality characteristics
	import delimited "$data/mx_mun_characteristics.csv", clear
	keep mx_mun pop* migrants*

	replace pop2010="." if pop2010=="NA"
	replace migrants2010="." if migrants2010=="NA"
	replace migrants2010_rate="." if migrants2010=="NA"

	destring pop2010, replace
	destring migrants2010, replace
	destring migrants2010_rate, replace

	tempfile temp2
	save `temp2', replace

	* Mexico individual-level labor data
	import delimited "$data/enoe_individual_data.csv", clear

	* Time vars
	gen str_qtr=string(qtr)
	gen quarter=substr(str_qtr, 1, 1)
	gen year=substr(str_qtr, 2, 5)
	destring quarter, replace
	destring year, replace

	gen tempdate=date(int_date, "YMD")
	gen month=month(tempdate)
	gen date=day(tempdate)

	* Merge datasets
	merge m:1 mx_mun using `temp2'
	keep if _merge==3
	drop _merge

	merge m:1 mx_mun year month date using `temp'
	drop if _merge==2
	rename _merge merge_emif

	* Missing EMIF indicator
	gen missing_emif=0
	replace missing_emif=1 if merge_emif==1 & year==2020 & month>3
	replace missing_emif=1 if merge_emif==1 & year==2020 & month==3 & date>22

	* Covid/SI missing to zero (treat march or before as zeros)
	replace covid_exp=0 if covid_exp==. & missing_emif==0
	
	* Average migration rate
	gen migrants_rate_avg=.
	replace migrants_rate_avg=10000*(migrants+migrants2010)/(pop+pop2010) if pop2010!=.
	replace migrants_rate_avg=10000*(migrants)/(pop) if pop2010==.

	* Covid and SI weighted by migration rate (per 10,000)
	gen covid_exp_w=covid_exp*migrants_rate_avg

	* Clean MX covid variables (per 10,000)
	replace acc_cases="." if acc_cases=="NA"
	destring acc_cases, replace

	replace ma7d_cases="." if ma7d_cases=="NA"
	destring ma7d_cases, replace

	gen covid_exp_mx=10000*(ma7d_cases/pop)
	replace covid_exp_mx=0 if covid_exp_mx==.

	*Do analysis with panel at hand

	destring monthincomeifwork whrsifwork hrwageifwork, replace force

	gen t=.
	replace t=1 if qtr==12019
	replace t=2 if qtr==22019
	replace t=3 if qtr==32019
	replace t=4 if qtr==42019
	replace t=5 if qtr==12020
	replace t=6 if qtr==32020
	replace t=7 if qtr==42020

	label define quarter_lab 1 "Q1-2019" 2 "Q2-2019" 3 "Q3-2019" 4 "Q4-2019" 5 "Q1-2020" 6 "Q3-2020" 7 "Q4-2020"
	label values t quarter_lab

	egen id2=group(id)
	egen t2=group(int_date)

	drop if telephone==1
	drop if missing_emif==1

	xtset id2 t

	gen migrants_high=(migrants_rate_q=="q3" | migrants_rate_q=="q4")

	keep if status1=="active"

	/*
	Repeat main analysis with dataset at hand
	*/
	
	**All
	preserve
	collapse (mean) hrwage whrs covid_exp_w covid_exp_mx if hrwage>0 [aw=weight] , by(mx_mun t)
	reghdfe hrwage covid_exp_w, absorb(mx_mun t) cluster(mx_mun)
	mat results[1,`k']=_b[covid_exp_w]
	mat results[2,`k']=_se[covid_exp_w]
	mat results[3,`k']=_b[covid_exp_w] - invttail(e(df_r),0.025)*_se[covid_exp_w]
	mat results[4,`k']=_b[covid_exp_w] + invttail(e(df_r),0.025)*_se[covid_exp_w]
	
	reghdfe hrwage covid_exp_w covid_exp_mx, absorb(mx_mun t) cluster(mx_mun)
	mat results[5,`k']=_b[covid_exp_w]
	mat results[6,`k']=_se[covid_exp_w]
	mat results[7,`k']=_b[covid_exp_w] - invttail(e(df_r),0.025)*_se[covid_exp_w]
	mat results[8,`k']=_b[covid_exp_w] + invttail(e(df_r),0.025)*_se[covid_exp_w]
	
	restore

	preserve
	collapse (mean) hrwage whrs covid_exp_w covid_exp_mx [aw=weight], by(mx_mun t)
	reghdfe whrs covid_exp_w, absorb(mx_mun t) cluster(mx_mun)
	mat results[9,`k']=_b[covid_exp_w]
	mat results[10,`k']=_se[covid_exp_w]
	mat results[11,`k']=_b[covid_exp_w] - invttail(e(df_r),0.025)*_se[covid_exp_w]
	mat results[12,`k']=_b[covid_exp_w] + invttail(e(df_r),0.025)*_se[covid_exp_w]
	
	reghdfe whrs covid_exp_w covid_exp_mx, absorb(mx_mun t) cluster(mx_mun)
	mat results[13,`k']=_b[covid_exp_w]
	mat results[14,`k']=_se[covid_exp_w]
	mat results[15,`k']=_b[covid_exp_w] - invttail(e(df_r),0.025)*_se[covid_exp_w]
	mat results[16,`k']=_b[covid_exp_w] + invttail(e(df_r),0.025)*_se[covid_exp_w]
	
	restore
}

**Put in matrices

mat Mwages_betas = [results[5,1..61]]
mat Mhours_betas = [results[13,1..61]]

mat Mwages_ci = [results[7..8,1..61]]
mat Mhours_ci = [results[15..16,1..61]]

preserve
matsave Mwages_betas, replace p("$output") dropall
matsave Mhours_betas, replace p("$output") dropall
matsave Mwages_ci, replace p("$output") dropall
matsave Mhours_ci, replace p("$output") dropall
restore


**Wages
coefplot matrix(Mwages_betas), ci(Mwages_ci) msymbol(O) msize(small) ///
	transform(* = min(max(@,-0.01),0.01)) ciopts(lwidth(vthin)) ///
	ylabel(none, labsize(small) noticks nogrid)  ytick(,nogrid) ///
	xlabel(, labsize(vsmall)) xlab(-.01(.005).01) xscale(range( -.01 .01)) ///
	graphregion(color(white) fcolor(white) lcolor(white) ilcolor(white) ifcolor(white)) scheme(plotplain) ///
	xtitle("Estimated effect of US covid exposure on wages", size(small)) ytitle("Alternative migration weights with randomly selected years", size(small)) ///
	xline(0, lcolor(black) lwidth(medium))  ///
	legend(order(2 "Estimated" "effect" 1 "95% CI") rows(2) ring(0) position(2) size(small) region(fcolor(gs15))) ///
	ysize(5) xsize(3)
graph export "$output/robustness_weights_wages.png", replace 



**Hours
coefplot matrix(Mhours_betas), ci(Mhours_ci) msymbol(O) msize(small) ///
	transform(* = min(max(@,0),0.01)) ciopts(lwidth(vthin)) ///
	ylabel(none, labsize(small) noticks nogrid)  ytick(,nogrid) ///
	xlabel(, labsize(vsmall)) xlab(0(.005).01) xscale(range(0 .01)) ///
	graphregion(color(white) fcolor(white) lcolor(white) ilcolor(white) ifcolor(white)) scheme(plotplain) ///
	xtitle("Estimated effect of US covid exposure on worked hours", size(small)) ytitle("Alternative migration weights with randomly selected years", size(small)) ///
	xline(0, lcolor(black) lwidth(medium))  ///
	xline(0.00351, lcolor(maroon) lpattern(dash) lwidth(medium))  ///
	legend(order(2 "Estimated" "effect" 1 "95% CI") rows(2) ring(0) position(2) size(small) region(fcolor(gs15))) ///
	ysize(5) xsize(3)
graph export "$output/robustness_weights_hours.png", replace 

